*************************** read the original data**************************
****************************************************************
foreach x in imp exp{
forv i=0/9{
local year=2000+`i'
qui insheet using "${hmrc_datadir}\Trade\\`x'0`i'.csv", clear
qui gen year=`year'
qui compress
qui save "${hmrc_datadir}\Trade\\`x'_`year'", replace
}

forv i=10/13{
local year=2000+`i'
qui insheet using "${hmrc_datadir}\Trade\\`x'`i'.csv", clear
qui gen year=`year'
qui compress
qui save "${hmrc_datadir}\Trade\\`x'_`year'", replace
}
forv i=96/99{
local year=1900+`i'
qui insheet using "${hmrc_datadir}\Trade\\`x'`i'.csv", clear
qui gen year=`year'
qui compress
qui save "${hmrc_datadir}\Trade\\`x'_`year'", replace
}
}

foreach x in arr dis{
forv i=5/9{
local year=2000+`i'
qui insheet using "${hmrc_datadir}\Trade\\`x'0`i'.csv", clear
qui gen year=`year'
qui compress
qui save "${hmrc_datadir}\Trade\\`x'_`year'", replace
}

forv i=10/13{
local year=2000+`i'
qui insheet using "${hmrc_datadir}\Trade\\`x'`i'.csv", clear
qui gen year=`year'
qui compress
qui save "${hmrc_datadir}\Trade\\`x'_`year'", replace
}
}


***merge with country lookups
foreach x in exp imp{
forv year=2000/2009{
qui use "${hmrc_datadir}\Trade\\`x'_`year'", clear
local i=year-2000
merge m:1 codseq using "${hmrc_datadir}\Trade\countries0`i'"
keep if _merge==3
drop _merge
save, replace
}

forv year=2010/2012{
qui use "${hmrc_datadir}\Trade\\`x'_`year'", clear
local i=year-2000
merge m:1 codseq using "${hmrc_datadir}\Trade\countries`i'"
keep if _merge==3
drop _merge
save, replace
}

forv year=1996/1999{
qui use "${hmrc_datadir}\Trade\\`x'_`year'", clear
local i=year-1900
merge m:1 codseq using "${hmrc_datadir}\Trade\countries`i'"
keep if _merge==3
drop _merge
save, replace
}
*country look up 2013 not available,  use 2012 table instead.
qui use "${hmrc_datadir}\Trade\\`x'_2013", clear
merge m:1 codseq using "${hmrc_datadir}\Trade\countries12"
drop if _merge==2
drop _merge
save, replace
}

foreach x in arr dis{
forv year=2005/2009{
qui use "${hmrc_datadir}\Trade\\`x'_`year'", clear
local i=year-2000
merge m:1 codseq using "${hmrc_datadir}\Trade\countries0`i'"
keep if _merge==3
drop _merge
save, replace
}

forv year=2010/2012{
qui use "${hmrc_datadir}\Trade\\`x'_`year'", clear
local i=year-2000
merge m:1 codseq using "${hmrc_datadir}\Trade\countries`i'"
keep if _merge==3
drop _merge
save, replace
}

*country look up 2013 not available,  use 2012 table instead.
qui use "${hmrc_datadir}\Trade\\`x'_2013", clear
merge m:1 codseq using "${hmrc_datadir}\Trade\countries12"
drop if _merge==2
drop _merge
save, replace
}

***append years

foreach x in arr dis{
qui use "${hmrc_datadir}\Trade\\`x'_2005", replace
forv year=2006/2013{
qui append using "${hmrc_datadir}\Trade\\`x'_`year'.dta"
}
qui save "${hmrc_datadir}\Trade\\`x'", replace
}

foreach x in exp imp{
qui use "${hmrc_datadir}\Trade\\`x'_1996", replace
forv year=1997/2013{
qui append using "${hmrc_datadir}\Trade\\`x'_`year'.dta", force
}
qui save "${hmrc_datadir}\Trade\\`x'", replace
}


sum year
codebook country */
foreach x in arr dis imp exp{
use "${hmrc_datadir}\Trade\\`x'.dta" , clear
keep traderid
duplicates drop
sort traderid
drop if traderid=="BTTA" | traderid=="CI TRADE"| traderid=="CRUDE OIL ESTIMATE"| traderid=="FRAUD ESTIMATE"| traderid=="IOM TRADE"| traderid=="LVT"| traderid=="POSTAL"| traderid=="UNKNOWN"
save "${hmrc_datadir}\Trade\\`x'id.dta", replace
}
use  "${hmrc_datadir}\Trade\arrid.dta", clear
foreach x in dis imp exp{
append using "${hmrc_datadir}\Trade\\`x'id.dta"
}
save "${hmrc_datadir}\Trade\trade_id.dta", replace
duplicates drop 
gen branch=substr(traderid, -13,.)
gen traderid_nobranch=subinstr(traderid, branch, "",.)
save "${hmrc_datadir}\Trade\trade_id.dta", replace

/*****************************************************************************************************************
*******************************************clean and prepare the data

*********************************************
use "${hmrc_datadir}\Trade\imp", clear
keep hs4 comcode amend traderid agentid codseq cooseq tradeind cpc suite svalue netmass suppunit location qty3 cbcode prefcode addtype perref flow decrep year country area1 area2 area3 area4 area5 area1a /*drop sitc2 directin portseq entrydte ctr
 job mot imot sitc suppress  _merge  currinv*/

keep if year<2012&year>1999

gen developed=(area1==17|area2==17|area3==17|area4==17|area5==17)
gen tdeveloping=(area1==16|area2==16|area3==16|area4==16|area5==16)
gen cdeveloping=(area1==15|area2==15|area3==15|area4==15|area5==15)
gen efta=(area1==11|area2==11|area3==11|area4==11|area5==11)
gen oil=(area1==10|area2==10|area3==10|area4==10|area5==10)
drop area*
*keep only years in perref
sum perref
replace perref=int(perref/100)
gen diff=year-perref
tab diff
drop diff

*make the name of countries consistent
gen y=-year
sort codseq y
drop y
bysort codseq: gen n=_n==1
replace country=country[_n-1] if n==0
drop n
save "${hmrc_datadir}\Trade\imp_sampleclean", replace

use "${hmrc_datadir}\Trade\imp_sampleclean", clear
tab netmass if svalue==0
tab suppunit if svalue==0
drop if svalue==0
drop if country=="Low Value Trade"
tab decrep if traderid=="UNKNOWN"
codebook agentid if traderid=="UNKNOWN"
drop if traderid=="UNKNOWN"
save "${hmrc_datadir}\Trade\imp_sampleclean1", replace

/**********collapse strategy1: collpase all svalue according to unique comcode traderid etc***********
use "${hmrc_datadir}\Trade\imp_sampleclean1", clear
*clean prefcode
replace prefcode=0 if prefcode==. 
*correct cbcode
tab cbcode if cbcode>13
replace cbcode=cbcode/10 if cbcode>13
*collapse goods from same company going to the same country in the same year
collapse (sum) svalue netmass suppunit qty3, by (hs4 traderid comcode codseq cooseq cpc tradeind suite location cbcode prefcode addtype flow agentid decrep developed tdeveloping cdeveloping efta oil year country)
tab cbcode if svalue<0
tab efta if svalue<0
replace cbcode=1 if svalue<0  /*cbcode=1 is for the efta countries, here the efta are all 1 means here the cbcode should be 1 instead of 10*/
collapse (sum) svalue netmass suppunit qty3, by (hs4 traderid comcode codseq cooseq cpc tradeind suite location cbcode prefcode addtype flow agentid decrep developed tdeveloping cdeveloping efta oil year country)
tab cbcode if netmass<0
tab efta if netmass<0
replace cbcode=1 if netmass<0
collapse (sum) svalue netmass suppunit qty3, by (hs4 traderid comcode codseq cooseq cpc tradeind suite location cbcode prefcode addtype flow agentid decrep developed tdeveloping cdeveloping efta oil year country)
sum svalue netmass suppunit qty3
count if svalue==0
sum netmass suppunit qty3 if svalue==0
drop if svalue==0


/*duplicates tag traderid hs4 codseq, generate(m)

drop if netmass==0&suppunit==0&qty3==0&m==0*/


gen upweight=svalue/netmass
gen upquantity=svalue/suppunit

save "${hmrc_datadir}\Trade\imp_collapsed", replace


*************collpase stragetry 2: collpase all positive svalue together and all negative svalue together by traderid comcode etc**********
/*restore
keep if svalue<0
collapse (sum) svalue netmass suppunit qty3, by (hs4 traderid comcode codseq cooseq cpc tradeind suite location cbcode prefcode addtype flow agentid decrep developed tdeveloping cdeveloping efta oil year country)
tab cbcode if svalue<0
tab cbcode if netmass<0
tab efta if svalue<0
tab efta if netmass<0 /*all efta are 1, suggest that cbcode should be 1 instead of 10*/
replace cbcode=1 if svalue<0
replace cbcode=1 if netmass<0
collapse (sum) svalue netmass suppunit qty3, by (hs4 traderid comcode codseq cooseq cpc tradeind suite location cbcode prefcode addtype flow agentid decrep developed tdeveloping cdeveloping efta oil year country)

save "${hmrc_datadir}\Trade\imp_collapsednegative", replace
append using "${hmrc_datadir}\Trade\imp_collapsedpositive"*/

*Note: A Stragetry 3 was written in trade_try1 do file.

***************************************************************/
use "${hmrc_datadir}\Trade\exp", clear
keep hs4 comcode traderid decrep agentid amend codseq tradeind cpc suite svalue netmass suppunit location perref flow year country area1 area2 area3 area4 area5 area1a /*drop sitc2 portseq directin entrydte ctr job mot imot sitc suppress 
_merge depdate currinv*/

keep if year<2012&year>1999

gen developed=(area1==17|area2==17|area3==17|area4==17|area5==17)
gen tdeveloping=(area1==16|area2==16|area3==16|area4==16|area5==16)
gen cdeveloping=(area1==15|area2==15|area3==15|area4==15|area5==15)
gen efta=(area1==11|area2==11|area3==11|area4==11|area5==11)
gen oil=(area1==10|area2==10|area3==10|area4==10|area5==10)
drop area*
*keep only years in perref
sum perref
replace perref=int(perref/100)
gen diff=year-perref
tab diff
drop diff
drop perref /*if no difference with year*/

*make the name of countries consistent
gen y=-year
sort codseq y
drop y
bysort codseq: gen n=_n==1
replace country=country[_n-1] if n==0
drop n
save "${hmrc_datadir}\Trade\exp_sampleclean", replace

use "${hmrc_datadir}\Trade\exp_sampleclean", clear
tab netmass if svalue==0
tab suppunit if svalue==0
drop if svalue==0
drop if country=="Low Value Trade"
tab decrep if traderid=="UNKNOWN"
codebook agentid if traderid=="UNKNOWN"
drop if traderid=="UNKNOWN"
save "${hmrc_datadir}\Trade\exp_sampleclean1", replace

*collapse goods from same company going to the same country in the same year
use "${hmrc_datadir}\Trade\exp_sampleclean1", clear
/*count if netmass==0&suppunit==0
sum svalue if netmass==0&suppunit==0

preserve
keep if netmass==0&suppunit==0
collapse (sum) svalue netmass suppunit , by ( traderid codseq year comcode hs4 tradeind cpc suite location flow agentid decrep country developed tdeveloping cdeveloping efta oil)
sum svalue
drop if svalue==0 /*left all positive svalues*/
save "${hmrc_datadir}\Trade\exp_0unit",replace

restore
drop if netmass==0&suppunit==0

preserve
keep if svalue<0
count
tab amended 
collapse (sum) svalue netmass suppunit , by ( traderid codseq year comcode hs4 tradeind cpc suite location flow agentid decrep country developed tdeveloping cdeveloping efta oil)
count
gen up= svalue/ suppunit
gen wp= svalue/ netmass
browse hs4 if wp==.
browse hs4 if up==.
count if wp==.&up==.
sum up wp
save "${hmrc_datadir}\Trade\exp_negcollpased",replace

restore
keep if svalue>0
count
append using "${hmrc_datadir}\Trade\exp_0unit"
collapse (sum) svalue netmass suppunit , by ( traderid codseq year comcode hs4 tradeind cpc suite location flow agentid decrep country developed tdeveloping cdeveloping efta oil)
drop if netmass==0&suppunit==0
gen up= svalue/ suppunit
gen wp= svalue/ netmass
count if wp==.&up==.
sum up wp
browse hs4 if wp==.
browse hs4 if up==.
append using "${hmrc_datadir}\Trade\exp_negcollpased"

save "${hmrc_datadir}\Trade\exp_poscollpased",replace*/


collapse (sum) svalue netmass suppunit , by ( traderid codseq year comcode hs4 tradeind cpc suite location flow country developed tdeveloping cdeveloping efta)
sum svalue netmass suppunit
count if svalue==0
count if netmass==0
count if suppunit==0
drop if svalue==0
count if netmass==0&suppunit==0
gen upweight=svalue/netmass
gen upquatity=svalue/suppunit
save "${hmrc_datadir}\Trade\exp_collapsed", replace
*************************************************************************
use "${hmrc_datadir}\Trade\arr.dta" , clear
keep hs4 traderid comcode amend codseq tradeind svalue netmass suppunit perref ancosts consol suite flow year country area1 area2 area3 area4 area5 area1a netmassest
drop if traderid=="UNKNOWN"
drop if country=="Low Value Trade"
keep if year<2012&year>1999
drop if svalue==0
drop if traderid=="BTTA"


count if svalue<0
count if svalue<0 &amend!=0
count if svalue<0 &amend==0
sort codseq traderid year comcode svalue

gen developed=(area1==17|area2==17|area3==17|area4==17|area5==17)
gen tdeveloping=(area1==16|area2==16|area3==16|area4==16|area5==16)
gen cdeveloping=(area1==15|area2==15|area3==15|area4==15|area5==15)
gen efta=(area1==11|area2==11|area3==11|area4==11|area5==11)
gen oil=(area1==10|area2==10|area3==10|area4==10|area5==10)
tab efta
drop efta
tab oil
gen ec=(area1==1|area2==1|area3==1|area4==1|area5==1)
drop oil
tab developed
tab ec
tab tdeveloping
tab cdeveloping
drop tdeveloping
drop cdeveloping
tab country if ec==0
tab country if developed==1
drop developed
drop area*


collapse (sum) svalue netmass suppunit, by (hs4 comcode traderid codseq tradeind consol suite flow year country ec)
sum svalue netmass suppunit
sum netmass suppunit if svalue==0
drop if svalue==0
gen upweight=svalue/netmass
gen upquantity=svalue/suppunit
save "${hmrc_datadir}\Trade\arr_collapsed.dta", replace
**************************************************************
use "${hmrc_datadir}\Trade\dis.dta" , clear
keep hs4 traderid comcode amend codseq tradeind svalue netmass suppunit perref notrans ancosts consol suite flow agentid year country area1 area2 area3 area4 area5 area1a netmassest
drop if traderid=="UNKNOWN"
drop if country=="Low Value Trade"
keep if year<2012&year>1999
drop if svalue==0
drop if traderid=="BTTA"

sort codseq traderid year comcode svalue

gen developed=(area1==17|area2==17|area3==17|area4==17|area5==17)
gen tdeveloping=(area1==16|area2==16|area3==16|area4==16|area5==16)
gen cdeveloping=(area1==15|area2==15|area3==15|area4==15|area5==15)
gen efta=(area1==11|area2==11|area3==11|area4==11|area5==11)
gen oil=(area1==10|area2==10|area3==10|area4==10|area5==10)
tab efta
drop efta
tab oil
gen ec=(area1==1|area2==1|area3==1|area4==1|area5==1)
drop oil
tab developed
tab ec
tab tdeveloping
tab cdeveloping
drop tdeveloping
drop cdeveloping
tab country if ec==0
tab country if developed==1
drop developed
drop area*

collapse (sum) svalue netmass suppunit, by (hs4 comcode traderid codseq tradeind consol suite flow year country ec)
sum svalue netmass suppunit
sum netmass suppunit if svalue==0
drop if svalue==0
gen upweight=svalue/netmass
gen upquantity=svalue/suppunit
save "${hmrc_datadir}\Trade\dis_collapsed.dta", replace
***************************************************************************************************************************************
********************************************************************************************************************************
**************************creat a small version dropping all the special code, one row for each trader comodity year**********
foreach i in dis arr imp exp{
use "${hmrc_datadir}\Trade\\`i'_collapsed.dta", clear
*pseudo diamonds; ships and aircraft stores; non-monetary and monetary gold and gold coins
drop if suite!=0
collapse (sum) svalue netmass suppunit, by (hs4 comcode traderid flow year country)
gen upweight=svalue/netmass
gen upquantity=svalue/suppunit
save "${hmrc_datadir}\Trade\\`i'_collapsed_reduced.dta", replace
}

use "${hmrc_datadir}\Trade\arr_collapsed_reduced.dta", clear
foreach i in dis imp exp{
append using "${hmrc_datadir}\Trade\\`i'_collapsed_reduced.dta"
save "${hmrc_datadir}\Trade\total_collapsed_reduced.dta", replace
}
use "${hmrc_datadir}\Trade\total_collapsed_reduced.dta", clear
gen branch=substr( traderid, -13,.)
browse if branch==""
tab traderid if branch==""
gen traderid_nobranch=subinstr( traderid , branch,"",.)
drop hs4
save "${hmrc_datadir}\Trade\total_collapsed_reduced.dta", replace
